#!/usr/bin/env python
# coding: utf-8

# In[11]:


import requests
import lxml.etree as le
import pandas as pd
import requests
import lxml.etree as le
import pandas as pd
import xlwt
url = 'https://www.runoob.com/html/html-tutorial.html'

x = '//div[@id="leftcolumn"]/a/text()'
y = '//div[@id="leftcolumn"]/a/@href'

content = requests.get(url=url).content

contentx = le.HTML(content)

rets = contentx.xpath(x)
la = contentx.xpath(y)


# In[12]:


data1 = []
data2 = []
for category1 in rets:
    data1.append(category1.strip())

for category2 in la:
    data2.append(url+category2.strip())

data = list(zip(data1, data2))
date = pd.DataFrame(data)
date.columns = ['contents', 'contents_url']
date


# In[15]:


writer = pd.ExcelWriter('保存数据.xlsx')
date.to_excel(writer, sheet_name='提取名')
writer.save()
'写入完毕'

